#Factor Analysis 
#by Justin Rodgers & Jihoon Song
#March 2014

library(foreign)

# Setting up the directory and loading dataset
setwd("C:/Users/Jihoon/Desktop/_Classes/2014S_Calsses/GOV 2001 Advanced Quantitative Research Methodology/03_Replication/Community_Survey")
load("01951Earls-PHDCN-CommunitySurvey-Main-Data.RData")

# Extracting data relevant to the study
varname1 <- c("NC_NUM","COHESION","CSCAPITL","CONTROL")
cohe.cont <- x[varname1]
cohe.cont2 <- na.omit(cohe.cont)

# Mean by neighborhood cluster
cont.nc <- aggregate (CONTROL~NC_NUM,cohe.cont2,mean)
cohe.nc <- aggregate (COHESION~NC_NUM,cohe.cont2,mean)
cptl.nc <- aggregate (CSCAPITL~NC_NUM,cohe.cont2,mean)

# Extracting data relevant to the study 2
varname2 <- c("NC_NUM","Q11B","Q11E","Q11F","Q11K","Q11M","Q11D","Q11G","Q11N","Q11P","Q11T","Q12A","Q12B","Q12C","Q12E","Q12F")
quest <- x[varname2]
quest2 <- na.omit(quest)

# Transform the data into numeric values
quest3 <- sapply(quest2[2:16],function(quest2) as.numeric(substr(quest2,1,1)))
quest3 <- as.data.frame(cbind(quest2[1],quest3))
# Reversely coded questions
quest3[,4] <- 6-quest3[,4]
quest3[,5] <- 6-quest3[,5]

# Subsetting into social cohesion, capital, and control
quest4 <- quest3[,2:6]
quest5 <- quest3[,7:11]
quest6 <- quest3[,12:16]

# Merging for factor analysis
quest.chcp <- cbind(quest4,quest5)
quest.chct <- cbind(quest4,quest6)

# Factor Analysis of Social cohesion-capital measures
fa.chcp <- factanal(x=quest.chcp, factors=1, scores="regression")
fasc.chcp <- fa.chcp$scores
fald.chcp <- fa.chcp$loadings[,1]
summary(fasc.chcp)

# Factor Analysis of Social cohesion-control measures
fa.chct <- factanal(x=quest.chct, factors=1, scores="regression")
fasc.chct <- fa.chct$scores
fald.chct <- fa.chct$loadings[,1]
summary(fasc.chct)

# Checking loading
fald.chcp
fald.chct

# Caculating a combined scale using factor analysis results 
weighted.sd <- function(x,w) {
  sum.w <- sum(w)
  sum.w2 <- sum(w^2)
  mean.w <- sum(x*w)/sum(w)
  x.sd.w <- sqrt((sum.w/(sum.w^2-sum.w2))*sum(w*(x-mean.w)^2))
  return(x.sd.w)
}

re.scale <- function(f.scores,raw.data,loadings) {
  fz.scores <- (f.scores+mean(f.scores))/(sd(f.scores))
  means <- apply(raw.data,1,weighted.mean,w=loadings)
  sds <- apply(raw.data,1,weighted.sd,w=loadings)
  grand.mean <- mean(means)
  grand.sd <- mean(sds)
  final.scores <- ((fz.scores*grand.sd)+grand.mean)
  return(final.scores)
}

score.chcp <- re.scale(fasc.chcp, quest.chcp, fald.chcp)
summary(score.chcp)
hist(score.chcp)
head(score.chcp)

score.chct <- re.scale(fasc.chct, quest.chct, fald.chct)
summary(score.chct)
hist(score.chct)
head(score.chct)

# Merge with neighborhood cluster information
sctb.chcp <- as.data.frame(cbind(quest3[,1],score.chcp))
head(sctb.chcp)
sctb.chct <- as.data.frame(cbind(quest3[,1],score.chct))
head(sctb.chct)
sctb.both <- as.data.frame(cbind(sctb.chcp,score.chct))
head(sctb.both)

# Mean by neighborhood cluster of factor analysis result
cmsc.chcp <- aggregate(Factor1~V1,sctb.chcp,mean)
cmsc.chct <- aggregate(Factor1~V1,sctb.chct,mean)
cor(cmsc.chcp,cmsc.chct)


# Reversing the scale to match with the original article
cmsc.chcp <- cbind(cmsc.chcp,6-cmsc.chcp[,2])
cmsc.chct <- cbind(cmsc.chct,6-cmsc.chct[,2])

# Changing column names
colnames(cmsc.chcp)[3] <- "chcp_fac"
colnames(cmsc.chct)[3] <- "chct_fac"
colnames(cmsc.chcp)[1] <- "nc"
colnames(cmsc.chct)[1] <- "nc"

save(cmsc.chcp,cmsc.chct,sctb.chcp,sctb.chct,cohe.nc,cptl.nc,cont.nc,file="factor_data.Rdata")
load("C:/Users/Jihoon/Desktop/_Classes/2014S_Calsses/GOV 2001 Advanced Quantitative Research Methodology/03_Replication/factor_data.Rdata")
